Note: Open scripts.Rproj first, then script. To easily use relative paths, click the down button next to knit and then click “Knit Directory –> Project Directory”. This should make loading and saving files much easier.
Produce a coverage heatmap for each subset of samples selected from the network analysis.
Red text indicates regions that require the user to modify. Regardless, the user should check over all code blocks to ensure that everything is running correctly.
Load packages.
library(tidyverse)
library(maditr)
library(gplots)
library(viridis)
# library(htmlwidgets)
# library(plotly)
# library(grid)
# library(cowplot)
Change file names and condition variables where appropriate.
# Input metadata file
metadata.file <- "mag_metadata.tsv"
# Input coverage file
cov.file <- "coverage.tsv.gz"
# Output coverage results
prefix <- "mag2scaffold.subset"
Load metadata file.
metadata <- read.table(metadata.file,
sep='\t', header=TRUE, check.names=FALSE,
stringsAsFactors=FALSE, comment.char='')
Load coverm genome MAG coverage results - clean-up
sample names.
cov <- read.table(cov.file, sep='\t',
header=TRUE, check.names=FALSE,
stringsAsFactors=FALSE)
Open subset files and extract gene names. Adjust file names as needed.
clade.green <- read.table("YNP_onlyMAGs_GreenClade.txt", sep=',', header=FALSE, check.names=FALSE, stringsAsFactors=FALSE) %>% select(V1) %>% as.vector() %>% .[["V1"]]
clade.orange <- read.table("YNP_onlyMAGs_OrangeClade.txt", sep=',', header=FALSE, check.names=FALSE, stringsAsFactors=FALSE) %>% select(V1) %>% as.vector() %>% .[["V1"]]
clade.purple <- read.table("YNP_onlyMAGs_PurpleClade.txt", sep=',', header=FALSE, check.names=FALSE, stringsAsFactors=FALSE) %>% select(V1) %>% as.vector() %>% .[["V1"]]
clade.red. <- read.table("YNP_onlyMAGs_RedClade.txt", sep=',', header=FALSE, check.names=FALSE, stringsAsFactors=FALSE) %>% select(V1) %>% as.vector() %>% .[["V1"]]
Extract coverage values from coverm genome results file
and format into a matrix for plotting.
# Build matrix using: Group_ID<->sample
cov.TPM <- cov %>%
dcast(Genome ~ Sample, value.var = "TPM") %>%
filter(!Genome %in% c("unmapped")) %>%
column_to_rownames("Genome")
Colors for plotting
# Row colors (ordered by Group_ID in TPM matrix)
row.colors <- data.frame("MAG_ID" = rownames(cov.TPM))
row.colors <- merge(row.colors, metadata,
by.x = "MAG_ID", by.y = "MAG_ID",
all.x = TRUE, all.y = FALSE)
# Z-score scaled TPM by MAG (row) using the 'scale' function
selected.names <- clade.green
heatmap.2(cov.TPM[selected.names, ] %>% t() %>% scale() %>% t() %>% as.matrix(),
main="Green clade - TPM z-score scaled rows (groups)",
Colv=FALSE,
dendrogram="row",
col=viridis,
trace="none", # Draw "trace" line
key=TRUE, # Show color-key
margins=c(8,8), # Numeric vector of length 2 containing the margins for column and row names, respectively.
offsetRow=0,
offsetCol=0,
cexRow=0.05, # Positive numbers, used as cex.axis in for the row or column axis labeling.
cexCol=0.4,
#cellnote=formatC(cov.TPM, big.mark=",", format = "fg"),
notecex=0.05,
RowSideColors=as.vector(row.colors[row.colors$MAG_ID %in% selected.names, ]$Label_color),
)
# Z-score scaled TPM by MAG (row) using the 'scale' function
selected.names <- clade.orange
heatmap.2(cov.TPM[selected.names, ] %>% t() %>% scale() %>% t() %>% as.matrix(),
main="Orange clade - TPM z-score scaled rows (groups)",
Colv=FALSE,
dendrogram="row",
col=viridis,
trace="none", # Draw "trace" line
key=TRUE, # Show color-key
margins=c(8,8), # Numeric vector of length 2 containing the margins for column and row names, respectively.
offsetRow=0,
offsetCol=0,
cexRow=0.05, # Positive numbers, used as cex.axis in for the row or column axis labeling.
cexCol=0.4,
#cellnote=formatC(cov.TPM, big.mark=",", format = "fg"),
notecex=0.05,
RowSideColors=as.vector(row.colors[row.colors$MAG_ID %in% selected.names, ]$Label_color),
)
# Z-score scaled TPM by MAG (row) using the 'scale' function
selected.names <- clade.purple
heatmap.2(cov.TPM[selected.names, ] %>% t() %>% scale() %>% t() %>% as.matrix(),
main="Purple clade - TPM z-score scaled rows (groups)",
Colv=FALSE,
dendrogram="row",
col=viridis,
trace="none", # Draw "trace" line
key=TRUE, # Show color-key
margins=c(8,8), # Numeric vector of length 2 containing the margins for column and row names, respectively.
offsetRow=0,
offsetCol=0,
cexRow=0.05, # Positive numbers, used as cex.axis in for the row or column axis labeling.
cexCol=0.4,
#cellnote=formatC(cov.TPM, big.mark=",", format = "fg"),
notecex=0.05,
RowSideColors=as.vector(row.colors[row.colors$MAG_ID %in% selected.names, ]$Label_color),
)
# Z-score scaled TPM by MAG (row) using the 'scale' function
selected.names <- clade.red.
heatmap.2(cov.TPM[selected.names, ] %>% t() %>% scale() %>% t() %>% as.matrix(),
main="Red clade - TPM z-score scaled rows (groups)",
Colv=FALSE,
dendrogram="row",
col=viridis,
trace="none", # Draw "trace" line
key=TRUE, # Show color-key
margins=c(8,8), # Numeric vector of length 2 containing the margins for column and row names, respectively.
offsetRow=0,
offsetCol=0,
cexRow=0.05, # Positive numbers, used as cex.axis in for the row or column axis labeling.
cexCol=0.4,
#cellnote=formatC(cov.TPM, big.mark=",", format = "fg"),
notecex=0.05,
RowSideColors=as.vector(row.colors[row.colors$MAG_ID %in% selected.names, ]$Label_color),
)
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.2.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices datasets utils methods base
##
## other attached packages:
## [1] viridis_0.6.5 viridisLite_0.4.2 gplots_3.2.0 maditr_0.8.4
## [5] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [9] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [13] ggplot2_3.5.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.9 utf8_1.2.4 generics_0.1.3 renv_1.0.11
## [5] bitops_1.0-9 KernSmooth_2.23-24 gtools_3.9.5 stringi_1.8.4
## [9] hms_1.1.3 digest_0.6.37 magrittr_2.0.3 caTools_1.18.3
## [13] evaluate_1.0.1 grid_4.3.1 timechange_0.3.0 fastmap_1.2.0
## [17] jsonlite_1.8.9 gridExtra_2.3 fansi_1.0.6 scales_1.3.0
## [21] jquerylib_0.1.4 cli_3.6.3 rlang_1.1.4 munsell_0.5.1
## [25] withr_3.0.2 cachem_1.1.0 yaml_2.3.10 tools_4.3.1
## [29] tzdb_0.4.0 colorspace_2.1-1 vctrs_0.6.5 R6_2.5.1
## [33] lifecycle_1.0.4 pkgconfig_2.0.3 pillar_1.9.0 bslib_0.8.0
## [37] gtable_0.3.6 glue_1.8.0 data.table_1.16.2 xfun_0.49
## [41] tidyselect_1.2.1 rstudioapi_0.17.1 knitr_1.49 htmltools_0.5.8.1
## [45] rmarkdown_2.29 compiler_4.3.1